#First, load the file
zillow = data.frame(read.csv("price.csv"))
head(zillow)
# Remove empty rows, NA's are only present if no data was collected for the location
zillow = na.omit(zillow)
# going through and discarding states that are not defined by the Uited States as West or East coast states
eastcoast = subset(zillow, State == 'CT' | State == 'NY' | State == 'VA' | State == 'FL' | State == 'NJ' | State == 'NC' |State == 'SC' | State == 'GA' | State == 'DE' | State == 'MA' | State == 'MD' | State == 'ME' | State == 'RI' | State == 'NH')
eastcoast_annual = eastcoast[,c(1:81)]
eastcoast_annual = eastcoast_annual[-c(8,9,10,11,12,13,14,15,16,17,18)]
eastcoast_annual = eastcoast_annual[-c(9,10,11,12,13,14,15,16,17,18,19)]
eastcoast_annual = eastcoast_annual[-c(10,11,12,13,14,15,16,17,18,19,20)]
eastcoast_annual = eastcoast_annual[-c(11,12,13,14,15,16,17,18,19,20,21)]
eastcoast_annual = eastcoast_annual[-c(12,13,14,15,16,17,18,19,20,21,22)]
eastcoast_annual = eastcoast_annual[-c(13,14,15,16,17,18,19,20,21,22,23)]
eastcoast_annual = eastcoast_annual[-c(14,15)]
westcoast = subset(zillow, State == 'CA' | State == 'OR' | State == 'WA' | State == 'HI' | State == 'AK')
westcoast_annual = westcoast[,c(1:81)]
westcoast_annual = westcoast_annual[-c(8,9,10,11,12,13,14,15,16,17,18)]
westcoast_annual = westcoast_annual[-c(9,10,11,12,13,14,15,16,17,18,19)]
westcoast_annual = westcoast_annual[-c(10,11,12,13,14,15,16,17,18,19,20)]
westcoast_annual = westcoast_annual[-c(11,12,13,14,15,16,17,18,19,20,21)]
westcoast_annual = westcoast_annual[-c(12,13,14,15,16,17,18,19,20,21,22)]
westcoast_annual = westcoast_annual[-c(13,14,15,16,17,18,19,20,21,22,23)]
westcoast_annual = westcoast_annual[-c(14,15)]
westcoast = westcoast[-c(2,3,4,5,6)]
westAnn = westcoast_annual
westAnn = westAnn[-c(2,3,4,5,6)]
eastAnn = eastcoast_annual
eastAnn = eastAnn[-c(2,3,4,5,6)]
# use dplyr package to create new columns Price.Diff and Avg.Price
eastcoast_annual <- eastcoast_annual %>%
mutate(Price.Diff = November.2016 - November.2010)
westcoast_annual <- westcoast_annual %>%
mutate(Price.Diff = November.2016 - November.2010)
eastcoast_annual <- eastcoast_annual %>%
mutate(Avg.Price = rowMeans(eastcoast_annual[7:13]))
westcoast_annual <- westcoast_annual %>%
mutate(Avg.Price = rowMeans(westcoast_annual[7:13]))
# clean the data
westcoast_annualClean = outlierKD2(westcoast_annual, November.2010, rm = TRUE)
Outliers identified: 64 Proportion (%) of outliers: 5.5 Mean of the outliers: 5337 Mean without removing outliers: 1733 Mean if we remove outliers: 1534 Outliers successfully removed
westcoast_annualClean = outlierKD2(westcoast_annualClean, November.2016, rm = TRUE)
Outliers
identified: 70 Proportion (%) of outliers: 6.1 Mean of the outliers:
6518 Mean without removing outliers: 2093 Mean if we remove outliers:
1823 Outliers successfully removed
westcoast_annualClean = outlierKD2(westcoast_annualClean, Avg.Price, rm = TRUE)
Outliers
identified: 72 Proportion (%) of outliers: 6.3 Mean of the outliers:
5489 Mean without removing outliers: 1837 Mean if we remove outliers:
1608 Outliers successfully removed
westcoast_annualClean = outlierKD2(westcoast_annualClean, Price.Diff, rm = TRUE)
Outliers
identified: 76 Proportion (%) of outliers: 6.6 Mean of the outliers:
1512 Mean without removing outliers: 359 Mean if we remove outliers: 283
Outliers successfully removed
eastcoast_annualClean = outlierKD2(eastcoast_annual, November.2010, rm = TRUE)
Outliers
identified: 171 Proportion (%) of outliers: 4.5 Mean of the outliers:
3417 Mean without removing outliers: 1458 Mean if we remove outliers:
1370 Outliers successfully removed
eastcoast_annualClean = outlierKD2(eastcoast_annualClean, November.2016, rm = TRUE)
Outliers
identified: 162 Proportion (%) of outliers: 4.2 Mean of the outliers:
3953 Mean without removing outliers: 1598 Mean if we remove outliers:
1498 Outliers successfully removed
eastcoast_annualClean = outlierKD2(eastcoast_annualClean, Avg.Price, rm = TRUE)
Outliers
identified: 171 Proportion (%) of outliers: 4.5 Mean of the outliers:
3583 Mean without removing outliers: 1513 Mean if we remove outliers:
1420 Outliers successfully removed
eastcoast_annualClean = outlierKD2(eastcoast_annualClean, Price.Diff, rm = TRUE)
Outliers
identified: 168 Proportion (%) of outliers: 4.4 Mean of the outliers:
595 Mean without removing outliers: 140 Mean if we remove outliers: 120
Outliers successfully removed
rownames(westAnn) = westAnn$City.Code
westAnn = westAnn[c(-1)]
westAnn = as.data.frame(t(westAnn))
names(westAnn)<-str_replace_all(names(westAnn), c(" " = "" , "," = ""))
westAnn <- cbind(Date = rownames(westAnn), westAnn)
rownames(westAnn) <- 1:nrow(westAnn)
head(westAnn)
rownames(eastAnn) = eastAnn$City.Code
eastAnn = eastAnn[c(-1)]
eastAnn = as.data.frame(t(eastAnn))
names(eastAnn)<-str_replace_all(names(eastAnn), c(" " = "" , "," = ""))
eastAnn <- cbind(Date = rownames(eastAnn), eastAnn)
rownames(eastAnn) <- 1:nrow(eastAnn)
head(eastAnn)
# getting the summaries to view basic stats of the data
summary(eastcoast_annual[7:15])
summary(westcoast_annual[7:15])
xkablesummary(westcoast_annual[7:15], title="West Coast Summary Annually")
xkablesummary(eastcoast_annual[7:15], title="East Coast Summary Annually")
xkablesummary(westcoast_annual[c(7,10,13:15)], title="West Coast Summary Annually")
xkablesummary(eastcoast_annual[c(7,10,13:15)], title="East Coast Summary Annually")
# checking outliers
outWest2010 <- boxplot.stats(westcoast_annual$November.2010)$out
outWest2016 <- boxplot.stats(westcoast_annual$November.2016)$out
outEast2010 <- boxplot.stats(eastcoast_annual$November.2010)$out
outEast2016 <- boxplot.stats(eastcoast_annual$November.2016)$out
min(outWest2010)
min(outWest2016)
min(outEast2010)
min(outEast2016)
library(ggplot2)
ggplot(westcoast_annualClean, aes(November.2010)) + geom_histogram(fill="red",alpha = 0.4,bins= 70) +
labs(title = "Rental Price Count on the West Coast in November 2010", x = "Rental Prices", y = "Frequency")
ggplot(westcoast_annual, aes(November.2010)) + geom_histogram(fill="red",alpha = 0.4,bins= 70) +
labs(title = "Rental Price Count on the West Coast in November 2010", x = "Rental Prices", y = "Frequency")
ggplot(westcoast_annual, aes(November.2016)) + geom_histogram(fill="blue",alpha = 0.4,bins= 70) +
labs(title = "Rental Price Count on the West Coast in November 2016", x = "Rental Prices", y = "Frequency")
ggplot(westcoast_annualClean, aes(November.2016)) + geom_histogram(fill="blue",alpha = 0.4,bins= 70) +
labs(title = "Rental Price Count on the West Coast in November 2016", x = "Rental Prices", y = "Frequency")
ggplot(westcoast_annual) +
geom_histogram(aes(November.2010), fill = "red", alpha = 0.4, bins = 70) +
geom_histogram(aes(November.2016), fill = "blue", alpha = 0.4, bins = 70) +
labs(title = "Rental Price Count on the West Coast from November 2010 to November 2016", x = "Rental Prices", y = "Frequency")
colors <- c("November.2010" = "red", "November.2016" = "blue")
ggplot(westcoast_annualClean) +
geom_histogram(aes(November.2010, fill = "November.2010"),alpha = 0.4, bins = 40) +
geom_histogram(aes(November.2016, fill = "November.2016"),alpha = 0.4, bins = 40) +
labs(title = "Rental Price Count on the West Coast from November 2010 to November 2016", x = "Rental Prices",
y = "Frequency", fill = "Legend") +
scale_color_manual(values = colors)
ggplot(eastcoast_annual, aes(November.2010)) + geom_histogram(fill="red",alpha = 0.4,bins= 40) +
labs(title = "Rental Price Count on the East Coast from November 2010", x = "Rental Prices", y = "Frequency")
ggplot(eastcoast_annualClean, aes(November.2010)) + geom_histogram(fill="red",alpha = 0.4,bins= 40) +
labs(title = "Rental Price Count on the East Coast from November 2010", x = "Rental Prices", y = "Frequency")
ggplot(eastcoast_annual, aes(November.2016)) + geom_histogram(fill="red",alpha = 0.4,bins= 40) +
labs(title = "Rental Price Count on the East Coast from November 2010", x = "Rental Prices", y = "Frequency")
ggplot(eastcoast_annualClean, aes(November.2016)) + geom_histogram(fill="blue",alpha = 0.4,bins= 40) +
labs(title = "Rental Price Count on the East Coast from November 2016", x = "Rental Prices", y = "Frequency")
ggplot(eastcoast_annual) +
geom_histogram(aes(November.2010), fill = "red", alpha = 0.4, bins = 40) +
geom_histogram(aes(November.2016), fill = "blue", alpha = 0.4, bins = 40) +
labs(title = "Rental Price Count on the East Coast from November 2010 to November 2016", x = "Rental Prices", y = "Frequency")
colors <- c("November.2010" = "red", "November.2016" = "blue")
ggplot(eastcoast_annualClean) +
geom_histogram(aes(November.2010, fill = "November.2010"),alpha = 0.4, bins = 40) +
geom_histogram(aes(November.2016, fill = "November.2016"),alpha = 0.4, bins = 40) +
labs(title = "Rental Price Count on the East Coast from November 2010 to November 2016", x = "Rental Prices",
y = "Frequency", fill = "Legend") +
scale_color_manual(values = colors)
ggplot(westcoast_annual, aes(State, November.2010, color=State)) + geom_boxplot() + labs(title = "West Coast States November 2010 Rent Prices with Outliers")
ggplot(westcoast_annualClean, aes(State, November.2010, color=State)) + geom_boxplot()+labs(title = "West Coast States November 2010 Rent Prices without Outliers")
ggplot(westcoast_annualClean, aes(x=November.2010)) + geom_boxplot(color="blue", outlier.shape = 9, outlier.size = 2, outlier.color = "blue") + labs(title="Entire West Coast November 2010 Boxplot") + scale_x_continuous(breaks=seq(500, 4000, 500))
ggplot(westcoast_annual, aes(State, November.2016, color=State)) + geom_boxplot()+labs(title = "West Coast States November 2016 Rent Prices with Outliers")
ggplot(westcoast_annualClean, aes(State, November.2016, color=State)) + geom_boxplot()+labs(title = "West Coast States November 2016 Rent Prices without Outliers")
ggplot(westcoast_annualClean, aes(x=November.2016)) + geom_boxplot(color="blue", outlier.shape = 9, outlier.size = 2, outlier.color = "blue") + labs(title="Entire West Coast November 2016 Boxplot") + scale_x_continuous(breaks=seq(500, 4000, 500))
ggplot(eastcoast_annual, aes(State, November.2010, color=State)) + geom_boxplot() + labs(title = "East Coast States November 2010 Rent Prices with Outliers")
ggplot(eastcoast_annualClean, aes(State, November.2010, color=State)) + geom_boxplot()+labs(title = "East Coast States November 2010 Rent Prices without Outliers")
ggplot(eastcoast_annualClean, aes(x=November.2010)) + geom_boxplot(color="red", outlier.shape = 9, outlier.size = 2, outlier.color = "red") + labs(title="Entire East Coast November 2010 Boxplot") + scale_x_continuous(breaks=seq(500, 4000, 500))
ggplot(eastcoast_annual, aes(State, November.2016, color=State)) + geom_boxplot()+labs(title = "East Coast States November 2016 Rent Prices with Outliers")
ggplot(eastcoast_annualClean, aes(State, November.2016, color=State)) + geom_boxplot()+labs(title = "East Coast States November 2016 Rent Prices without Outliers")
ggplot(eastcoast_annualClean, aes(x=November.2016)) + geom_boxplot(color="red", outlier.shape = 9, outlier.size = 2, outlier.color = "red") + labs(title="Entire East Coast November 2016 Boxplot") + scale_x_continuous(breaks=seq(500, 4000, 500))
ggplot(westcoast_annual, aes(November.2010,November.2016, color=State)) + geom_point() + labs(title="West Coast November 2010 and November 2016 with Outliers", x = "November.2010 Rent Prices", y = "November.2016 Rent Prices")
ggplot(westcoast_annualClean, aes(November.2010,November.2016, color=State)) + geom_point() + labs(title="West Coast November 2010 and November 2016 without Outliers", x = "November.2010 Rent Prices", y = "November.2016 Rent Prices")
ggplot(eastcoast_annual, aes(November.2010,November.2016, color=State)) + geom_point() + labs(title="East Coast November 2010 and November 2016 with Outliers", x = "November.2010 Rent Prices", y = "November.2016 Rent Prices")
ggplot(eastcoast_annualClean, aes(November.2010,November.2016, color=State)) + geom_point() + labs(title="East Coast November 2010 and November 2016 without Outliers", x = "November.2010 Rent Prices", y = "November.2016 Rent Prices")
qqnorm(westcoast_annualClean$November.2010, main="QQ-Plot West Coast November 2010 without Outliers")
qqline(westcoast_annualClean$November.2010)
qqnorm(westcoast_annualClean$November.2016, main="QQ-Plot West Coast November 2016 without Outliers")
qqline(westcoast_annualClean$November.2016)
qqnorm(eastcoast_annualClean$November.2010, main="QQ-Plot East Coast November 2010 without Outliers")
qqline(eastcoast_annualClean$November.2010)
qqnorm(eastcoast_annualClean$November.2016, main="QQ-Plot East Coast November 2016 without Outliers")
qqline(eastcoast_annualClean$November.2016)
## QQ Plots with Outliers
qqnorm(westcoast_annual$November.2010, main="QQ-Plot West Coast November 2010 with Outliers")
qqline(westcoast_annual$November.2010)
qqnorm(westcoast_annual$November.2016, main="QQ-Plot West Coast November 2016 with Outliers")
qqline(westcoast_annual$November.2016)
qqnorm(eastcoast_annual$November.2010, main="QQ-Plot East Coast November 2010 with Outliers")
qqline(eastcoast_annual$November.2010)
qqnorm(eastcoast_annual$November.2016, main="QQ-Plot East Coast November 2016 with Outliers")
qqline(eastcoast_annual$November.2016)
westAnn$Date = gsub('\\.',' ',westAnn$Date)
westAnn <- westAnn %>%
mutate(Avg.Price = rowMeans(westAnn[2:1222]))
westAnn$Max = rowMaxs(as.matrix(westAnn[2:1222]))
westAnn$Min = rowMins(as.matrix(westAnn[2:1222]))
ggplot(westAnn, aes(Date, Avg.Price)) + geom_point() + labs(title="Average Price of the Years on the West Coast")
eastAnn$Date = gsub('\\.',' ',eastAnn$Date)
eastAnn <- eastAnn %>%
mutate(Avg.Price = rowMeans(eastAnn[2:3989]))
eastAnn$Max = rowMaxs(as.matrix(eastAnn[2:3989]))
eastAnn$Min = rowMins(as.matrix(eastAnn[2:3989]))
ggplot(eastAnn, aes(Date, Avg.Price)) + geom_point() + labs(title="Average Price of the Years on the East Coast")
westAnnAvg = westAnn[-c(2:1222)]
eastAnnAvg = eastAnn[-c(2:3989)]
loadPkg("BSDA") # for z.test
west2010_80 = t.test(x=westcoast_annualClean$November.2010, conf.level = 0.80)
west2010_80
One Sample t-test
data: westcoast_annualClean$November.2010 t = 106, df = 1156, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 80 percent confidence interval: 1515 1553 sample estimates: mean of x 1534
west2016_80 = t.test(x=westcoast_annualClean$November.2016, conf.level = 0.80)
west2016_80
One Sample t-test
data: westcoast_annualClean$November.2016 t = 90, df = 1150, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 80 percent confidence interval: 1798 1849 sample estimates: mean of x 1823
west2010_99 = t.test(x=westcoast_annualClean$November.2010, conf.level = 0.99)
west2010_99
One Sample t-test
data: westcoast_annualClean$November.2010 t = 106, df = 1156, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 99 percent confidence interval: 1497 1571 sample estimates: mean of x 1534
west2016_99 = t.test(x=westcoast_annualClean$November.2016, conf.level = 0.99)
west2016_99
One Sample t-test
data: westcoast_annualClean$November.2016 t = 90, df = 1150, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 99 percent confidence interval: 1771 1876 sample estimates: mean of x 1823
east2010_80 = t.test(x=eastcoast_annualClean$November.2010, conf.level = 0.80)
east2010_80
One Sample t-test
data: eastcoast_annualClean$November.2010 t = 220, df = 3816, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 80 percent confidence interval: 1362 1378 sample estimates: mean of x 1370
east2016_80 = t.test(x=eastcoast_annualClean$November.2016, conf.level = 0.80)
east2016_80
One Sample t-test
data: eastcoast_annualClean$November.2016 t = 204, df = 3825, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 80 percent confidence interval: 1489 1508 sample estimates: mean of x 1498
east2010_99 = t.test(x=eastcoast_annualClean$November.2010, conf.level = 0.99)
east2010_99
One Sample t-test
data: eastcoast_annualClean$November.2010 t = 220, df = 3816, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 99 percent confidence interval: 1354 1386 sample estimates: mean of x 1370
east2016_99 = t.test(x=eastcoast_annualClean$November.2016, conf.level = 0.99)
east2016_99
One Sample t-test
data: eastcoast_annualClean$November.2016 t = 204, df = 3825, p-value <2e-16 alternative hypothesis: true mean is not equal to 0 99 percent confidence interval: 1480 1517 sample estimates: mean of x 1498
\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\ Start of Cities of Interest File \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
zillow_prices = data.frame(read.csv("price.csv"))
zillow_focus = zillow_prices[c(1:5,7,13,17,23,24,26,113,162),]
zillow_focus = zillow_focus[c(-1,-3,-4,-5,-6)]
zillow_focus
Our cities of interest are NYC (Queens) (only data from December 2011 Onwards), LA, PA, Houston (Harris), Chicago (Cook), Dallas, Las Vegas, San Fran, Detroit (Wayne), Roanoke, Richmond, District of Columbia, Baltimore, Seattle(King). Here, we clean data set and transpose the dates/cities
rownames(zillow_focus) = zillow_focus$City
zillow_focus = zillow_focus[c(-1)]
zillow_focus = as.data.frame(t(zillow_focus))
names(zillow_focus)<-str_replace_all(names(zillow_focus), c(" " = "" , "," = ""))
head(zillow_focus)
## NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## November.2010 NA 2184 1563 1198 1092 1188
## December.2010 NA 2184 1555 1199 1099 1183
## January.2011 NA 2183 1547 1199 1094 1178
## February.2011 NA 2188 1537 1200 1087 1177
## March.2011 NA 2189 1526 1203 1080 1178
## April.2011 NA 2189 1517 1205 1080 1179
## SanFrancisco Detroit Seattle Baltimore Washington Richmond
## November.2010 3188 847 1746 1192 2174 892
## December.2010 3207 844 1740 1195 2203 898
## January.2011 3189 832 1736 1203 2241 900
## February.2011 3127 820 1734 1210 2276 902
## March.2011 3040 810 1730 1220 2303 902
## April.2011 2970 806 1722 1228 2320 904
## Roanoke
## November.2010 1065
## December.2010 1067
## January.2011 1062
## February.2011 1060
## March.2011 1059
## April.2011 1066
str(zillow_focus)
## 'data.frame': 75 obs. of 13 variables:
## $ NewYork : int NA NA NA NA NA NA NA NA NA NA ...
## $ LosAngeles : int 2184 2184 2183 2188 2189 2189 2188 2191 2189 2186 ...
## $ Chicago : int 1563 1555 1547 1537 1526 1517 1507 1497 1493 1491 ...
## $ Houston : int 1198 1199 1199 1200 1203 1205 1204 1199 1194 1190 ...
## $ Philadelphia: int 1092 1099 1094 1087 1080 1080 1083 1082 1082 1085 ...
## $ LasVegas : int 1188 1183 1178 1177 1178 1179 1176 1170 1163 1158 ...
## $ SanFrancisco: int 3188 3207 3189 3127 3040 2970 2916 2879 2848 2848 ...
## $ Detroit : int 847 844 832 820 810 806 802 800 797 796 ...
## $ Seattle : int 1746 1740 1736 1734 1730 1722 1713 1709 1708 1706 ...
## $ Baltimore : int 1192 1195 1203 1210 1220 1228 1233 1230 1223 1214 ...
## $ Washington : int 2174 2203 2241 2276 2303 2320 2323 2318 2318 2322 ...
## $ Richmond : int 892 898 900 902 902 904 909 917 933 951 ...
## $ Roanoke : int 1065 1067 1062 1060 1059 1066 1070 1070 1062 1049 ...
Turn date into a column
zillow_focus <- cbind(Date = rownames(zillow_focus), zillow_focus)
rownames(zillow_focus) <- 1:nrow(zillow_focus)
head(zillow_focus)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 November.2010 NA 2184 1563 1198 1092 1188
## 2 December.2010 NA 2184 1555 1199 1099 1183
## 3 January.2011 NA 2183 1547 1199 1094 1178
## 4 February.2011 NA 2188 1537 1200 1087 1177
## 5 March.2011 NA 2189 1526 1203 1080 1178
## 6 April.2011 NA 2189 1517 1205 1080 1179
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 3188 847 1746 1192 2174 892 1065
## 2 3207 844 1740 1195 2203 898 1067
## 3 3189 832 1736 1203 2241 900 1062
## 4 3127 820 1734 1210 2276 902 1060
## 5 3040 810 1730 1220 2303 902 1059
## 6 2970 806 1722 1228 2320 904 1066
Here, we are converting strings to dates and to %d/%m/%Y form with 01 for %d
zillow_focus_test = zillow_focus
zillow_focus_test$Date <- str_replace_all(zillow_focus_test$Date, "[.]", " ")
zillow_focus_test$Month <- str_extract(zillow_focus_test$Date, "(\\w+)")
zillow_focus_test$Year <- str_extract(zillow_focus_test$Date, "\\w+$")
tmp_date = paste(match(zillow_focus_test$Month, month.name), zillow_focus_test$Year, sep="/")
tmp_date = paste("01", tmp_date, sep="/")
tmp_date = as.Date(tmp_date, "%d/%m/%Y")
zillow_focus$Date <- tmp_date
head(zillow_focus)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 2010-11-01 NA 2184 1563 1198 1092 1188
## 2 2010-12-01 NA 2184 1555 1199 1099 1183
## 3 2011-01-01 NA 2183 1547 1199 1094 1178
## 4 2011-02-01 NA 2188 1537 1200 1087 1177
## 5 2011-03-01 NA 2189 1526 1203 1080 1178
## 6 2011-04-01 NA 2189 1517 1205 1080 1179
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 3188 847 1746 1192 2174 892 1065
## 2 3207 844 1740 1195 2203 898 1067
## 3 3189 832 1736 1203 2241 900 1062
## 4 3127 820 1734 1210 2276 902 1060
## 5 3040 810 1730 1220 2303 902 1059
## 6 2970 806 1722 1228 2320 904 1066
str(zillow_focus)
## 'data.frame': 75 obs. of 14 variables:
## $ Date : Date, format: "2010-11-01" "2010-12-01" ...
## $ NewYork : int NA NA NA NA NA NA NA NA NA NA ...
## $ LosAngeles : int 2184 2184 2183 2188 2189 2189 2188 2191 2189 2186 ...
## $ Chicago : int 1563 1555 1547 1537 1526 1517 1507 1497 1493 1491 ...
## $ Houston : int 1198 1199 1199 1200 1203 1205 1204 1199 1194 1190 ...
## $ Philadelphia: int 1092 1099 1094 1087 1080 1080 1083 1082 1082 1085 ...
## $ LasVegas : int 1188 1183 1178 1177 1178 1179 1176 1170 1163 1158 ...
## $ SanFrancisco: int 3188 3207 3189 3127 3040 2970 2916 2879 2848 2848 ...
## $ Detroit : int 847 844 832 820 810 806 802 800 797 796 ...
## $ Seattle : int 1746 1740 1736 1734 1730 1722 1713 1709 1708 1706 ...
## $ Baltimore : int 1192 1195 1203 1210 1220 1228 1233 1230 1223 1214 ...
## $ Washington : int 2174 2203 2241 2276 2303 2320 2323 2318 2318 2322 ...
## $ Richmond : int 892 898 900 902 902 904 909 917 933 951 ...
## $ Roanoke : int 1065 1067 1062 1060 1059 1066 1070 1070 1062 1049 ...
Add a total column, not including New York (which has NAs)
zillow_total = zillow_focus
zillow_total$Total <- rowSums(zillow_total[3:14])
head(zillow_total)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 2010-11-01 NA 2184 1563 1198 1092 1188
## 2 2010-12-01 NA 2184 1555 1199 1099 1183
## 3 2011-01-01 NA 2183 1547 1199 1094 1178
## 4 2011-02-01 NA 2188 1537 1200 1087 1177
## 5 2011-03-01 NA 2189 1526 1203 1080 1178
## 6 2011-04-01 NA 2189 1517 1205 1080 1179
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke Total
## 1 3188 847 1746 1192 2174 892 1065 18329
## 2 3207 844 1740 1195 2203 898 1067 18374
## 3 3189 832 1736 1203 2241 900 1062 18364
## 4 3127 820 1734 1210 2276 902 1060 18318
## 5 3040 810 1730 1220 2303 902 1059 18240
## 6 2970 806 1722 1228 2320 904 1066 18186
Line plots of time series, one with only time
ggplot(zillow_total, aes(x=Date, y=Total)) + geom_point() + geom_line() + labs(x="Time", y="Total Rent Prices", title="Plot of City Rent Change over Time")
Melt the ggplot by the cities
zillow_melt <- melt(zillow_focus, id = "Date")
names(zillow_melt)[2] <- "City"
names(zillow_melt)[3] <- "Prices"
head(zillow_melt)
## Date City Prices
## 1 2010-11-01 NewYork NA
## 2 2010-12-01 NewYork NA
## 3 2011-01-01 NewYork NA
## 4 2011-02-01 NewYork NA
## 5 2011-03-01 NewYork NA
## 6 2011-04-01 NewYork NA
I’m not sure if histograms would be meaningful in the context of my part, but here’s one of all the cities melted together, ignoring New York again. We can do more if you guys believe it’s necessary. There is a huge skew because the bigger/more expensive cities chosen would have higher rent prices due to a difference in the cost of living. I’ll also do the Mean/Median/Quartiles of the overall plots here.
ggplot(zillow_melt, aes(x=Prices, na.rm=TRUE)) + geom_histogram(binwidth=300)
ggplot(zillow_melt, aes(x=Prices, na.rm=TRUE)) + geom_boxplot()
summary(zillow_melt$Prices)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 743 1106 1319 1671 2210 4547 13
Lines of the time series over their changes.
ggplot(zillow_melt, aes(x=Date, y=Prices, group=City, color=City, na.rm=TRUE)) + geom_point() + geom_line() + labs(x="Time", y="Rent Price", title="Plot of City Rent Prices over Time")
Find the first order change, difference between each month
zillow_change <- as.data.frame(lapply(zillow_focus[2:14], diff, lag=1))
zillow_change$Date <- zillow_focus$Date[2:75]
zillow_change <- zillow_change %>% dplyr::select(Date, everything())
head(zillow_change)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 2010-12-01 NA 0 -8 1 7 -5
## 2 2011-01-01 NA -1 -8 0 -5 -5
## 3 2011-02-01 NA 5 -10 1 -7 -1
## 4 2011-03-01 NA 1 -11 3 -7 1
## 5 2011-04-01 NA 0 -9 2 0 1
## 6 2011-05-01 NA -1 -10 -1 3 -3
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 19 -3 -6 3 29 6 2
## 2 -18 -12 -4 8 38 2 -5
## 3 -62 -12 -2 7 35 2 -2
## 4 -87 -10 -4 10 27 0 -1
## 5 -70 -4 -8 8 17 2 7
## 6 -54 -4 -9 5 3 5 4
Melt, and line graph of the changes for each city. The graph is kind of hard to visualize, but it looks like San Francisco had the highest change from time to time.
zillow_cmelt <- melt(zillow_change, id = "Date")
names(zillow_cmelt)[2] <- "City"
names(zillow_cmelt)[3] <- "PriceChange"
ggplot(zillow_cmelt, aes(x=Date, y=PriceChange, group=City, color=City, na.rm=TRUE)) + geom_point() + geom_line() + labs(x="Time", y="Price Change", title="Plot of City Rent Change over Time")
Cumulative change, to make the trends more clear in respect to each other
zillow_cumulative <- as.data.frame(cumsum(zillow_change[2:14]))
zillow_cumulative$Date <- zillow_focus$Date[2:75]
zillow_cumulative <- zillow_cumulative %>% dplyr::select(Date, everything())
head(zillow_cumulative)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 2010-12-01 NA 0 -8 1 7 -5
## 2 2011-01-01 NA -1 -16 1 2 -10
## 3 2011-02-01 NA 4 -26 2 -5 -11
## 4 2011-03-01 NA 5 -37 5 -12 -10
## 5 2011-04-01 NA 5 -46 7 -12 -9
## 6 2011-05-01 NA 4 -56 6 -9 -12
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 19 -3 -6 3 29 6 2
## 2 1 -15 -10 11 67 8 -3
## 3 -61 -27 -12 18 102 10 -5
## 4 -148 -37 -16 28 129 10 -6
## 5 -218 -41 -24 36 146 12 1
## 6 -272 -45 -33 41 149 17 5
Another graph, this time of cumulative change. San Francisco had the greatest change, while Seattle had the second largest. Detroit had the most negative change, falling below 0 overall.
zillow_cumumelt <- melt(zillow_cumulative, id = "Date")
names(zillow_cumumelt)[2] <- "City"
names(zillow_cumumelt)[3] <- "CumulativePriceChange"
ggplot(zillow_cumumelt, aes(x=Date, y=CumulativePriceChange, group=City, color=City, na.rm=TRUE)) + geom_point() + geom_line() + labs(x="Time", y="Cumulative Change", title="Plot of Cumulative City Rent Change over Time")
Find the first order change as a Percent
zillow_proportion <- zillow_change[2:14]/zillow_focus[c(2:75), 2:14]*100
zillow_proportion$Date <- zillow_focus$Date[2:75]
zillow_proportion <- zillow_proportion %>% dplyr::select(Date, everything())
head(zillow_proportion)
## Date NewYork LosAngeles Chicago Houston Philadelphia LasVegas
## 1 2010-12-01 NA 0.0000 -0.514 0.0834 0.637 -0.4227
## 2 2011-01-01 NA -0.0458 -0.517 0.0000 -0.457 -0.4244
## 3 2011-02-01 NA 0.2285 -0.651 0.0833 -0.644 -0.0850
## 4 2011-03-01 NA 0.0457 -0.721 0.2494 -0.648 0.0849
## 5 2011-04-01 NA 0.0000 -0.593 0.1660 0.000 0.0848
## 6 2011-05-01 NA -0.0457 -0.664 -0.0831 0.277 -0.2551
## SanFrancisco Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 0.592 -0.355 -0.345 0.251 1.316 0.668 0.1874
## 2 -0.564 -1.442 -0.230 0.665 1.696 0.222 -0.4708
## 3 -1.983 -1.463 -0.115 0.579 1.538 0.222 -0.1887
## 4 -2.862 -1.235 -0.231 0.820 1.172 0.000 -0.0944
## 5 -2.357 -0.496 -0.465 0.651 0.733 0.221 0.6567
## 6 -1.852 -0.499 -0.525 0.406 0.129 0.550 0.3738
It looks like Richmond has a pretty varied proportional change. It’s still hard to make anything out of it, so let’s move to the cumulative proportional change again.
zillow_pmelt <- melt(zillow_proportion, id = "Date")
names(zillow_pmelt)[2] <- "City"
names(zillow_pmelt)[3] <- "ProportionChange"
ggplot(zillow_pmelt, aes(x=Date, y=ProportionChange, group=City, color=City, na.rm=TRUE)) + geom_point() + geom_line() + labs(x="Time", y="Proportional Change", title="Plot of City Rent Change over Time")
Quick histogram of the distribution of proportional changes. The proportional changes over time look pretty normal taken together as a whole, with maybe a slight right skew, which makes sense as we would guess that rent prices should generally increase over time with inflation and what not. Remember, these changes are in percentages. There are outliers on both sides of the boxplot, which matches what we see in the histogram. Here are the measures of variance and qqnorm plots as well
ggplot(zillow_pmelt, aes(x=ProportionChange, na.rm=TRUE)) + geom_histogram() + labs(x = "Proportional Change in Percentages")
ggplot(zillow_pmelt, aes(x=ProportionChange, na.rm=TRUE)) + geom_boxplot()
qqnorm(zillow_pmelt$ProportionChange)
summary(zillow_pmelt$ProportionChange)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -2.90 -0.13 0.18 0.20 0.54 2.45 13
var(zillow_pmelt$ProportionChange, na.rm=TRUE)
## [1] 0.362
sd(zillow_pmelt$ProportionChange, na.rm=TRUE)
## [1] 0.602
Just making the cumulative melt for proportions now. We will drop New York before plotting the values, as there are too many NAs.
zillow_pdrop <- zillow_proportion[3:14]
zillow_pcumulative <- as.data.frame(cumsum(zillow_pdrop))
zillow_pcumulative$Date <- zillow_focus$Date[2:75]
zillow_pcumulative <- zillow_pcumulative %>% dplyr::select(Date, everything())
head(zillow_pcumulative)
## Date LosAngeles Chicago Houston Philadelphia LasVegas SanFrancisco
## 1 2010-12-01 0.0000 -0.514 0.0834 0.637 -0.423 0.592
## 2 2011-01-01 -0.0458 -1.032 0.0834 0.180 -0.847 0.028
## 3 2011-02-01 0.1827 -1.682 0.1667 -0.464 -0.932 -1.955
## 4 2011-03-01 0.2284 -2.403 0.4161 -1.112 -0.847 -4.817
## 5 2011-04-01 0.2284 -2.996 0.5821 -1.112 -0.762 -7.173
## 6 2011-05-01 0.1827 -3.660 0.4990 -0.835 -1.017 -9.025
## Detroit Seattle Baltimore Washington Richmond Roanoke
## 1 -0.355 -0.345 0.251 1.32 0.668 0.1874
## 2 -1.798 -0.575 0.916 3.01 0.890 -0.2834
## 3 -3.261 -0.691 1.495 4.55 1.112 -0.4720
## 4 -4.496 -0.922 2.314 5.72 1.112 -0.5665
## 5 -4.992 -1.386 2.966 6.45 1.333 0.0902
## 6 -5.491 -1.912 3.371 6.58 1.883 0.4640
We will drop New York before plotting the values, as there are too many NAs. Now, it looks like the proportional change of Seattle is a bit higher than San Francisco, and Richmond is up there too. Detroit still has the most negative proportional change overall.
zillow_pcumumelt <- melt(zillow_pcumulative, id = "Date")
names(zillow_pcumumelt)[2] <- "City"
names(zillow_pcumumelt)[3] <- "CumulativeProportionChange"
ggplot(zillow_pcumumelt, aes(x=Date, y=CumulativeProportionChange, group=City, color=City, na.rm=TRUE)) + geom_point() + geom_line() + labs(x="Time", y="Cumu. Prop. Change", title="Plot of Cumulative City Rent Change over Time")
Box plot of each city’s proportional change. As expected, San Francisco and Roanoke seem to have the largest ranges and variances. We are also dropping New York again.
zillow_prodrop = zillow_proportion[-2]
zillow_pdmelt <- melt(zillow_prodrop, id = "Date")
names(zillow_pdmelt)[2] <- "City"
names(zillow_pdmelt)[3] <- "ProportionChange"
ggplot(zillow_pdmelt, aes(x=City, y=ProportionChange, color=City, na.rm=TRUE)) + geom_boxplot(outlier.shape=8, outlier.size=4)
We will now anova test the proportional changes against each other, and see if the average change between years is different. Since we have a p-value less than our alpha, there are significant differences in changes, and thus, we look at the Post-hoc Tukey HSD. It looks like the pairs that have significantly different average proportional changes are [Detroit-LosAngeles], [Roanoke-LosAngeles], [SanFrancisco-Chicago], [Seattle-Chicago], [Richmond-Chicago], [Detroit-Houston], [Seattle-Philadelphia], [SanFrancisco-LasVegas], [Seattle-LasVegas], [Richmond-LasVegas], [Detroit-SanFrancisco], [Roanoke-SanFrancisco], [Seattle-Detroit], [Washington-Detroit], [Richmond-Detroit], [Baltimore-Seattle], [Roanoke-Seattle], and finally, [Roanoke-Richmond].
pm_anova = aov(zillow_pdmelt$ProportionChange ~ zillow_pdmelt$City)
summary(pm_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## zillow_pdmelt$City 11 31.4 2.855 8.52 2.1e-14 ***
## Residuals 876 293.6 0.335
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukeyAoV <- TukeyHSD(pm_anova)
tukeyAoV
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = zillow_pdmelt$ProportionChange ~ zillow_pdmelt$City)
##
## $`zillow_pdmelt$City`
## diff lwr upr p adj
## Chicago-LosAngeles -0.25457 -0.5665 0.05734 0.241
## Houston-LosAngeles -0.07357 -0.3855 0.23834 1.000
## Philadelphia-LosAngeles -0.17203 -0.4839 0.13988 0.814
## LasVegas-LosAngeles -0.25543 -0.5673 0.05648 0.236
## SanFrancisco-LosAngeles 0.11176 -0.2001 0.42366 0.991
## Detroit-LosAngeles -0.47720 -0.7891 -0.16529 0.000
## Seattle-LosAngeles 0.16025 -0.1517 0.47215 0.876
## Baltimore-LosAngeles -0.18919 -0.5011 0.12272 0.702
## Washington-LosAngeles -0.08766 -0.3996 0.22424 0.999
## Richmond-LosAngeles 0.08187 -0.2300 0.39377 0.999
## Roanoke-LosAngeles -0.37085 -0.6828 -0.05895 0.006
## Houston-Chicago 0.18100 -0.1309 0.49291 0.758
## Philadelphia-Chicago 0.08254 -0.2294 0.39445 0.999
## LasVegas-Chicago -0.00086 -0.3128 0.31104 1.000
## SanFrancisco-Chicago 0.36633 0.0544 0.67823 0.007
## Detroit-Chicago -0.22263 -0.5345 0.08928 0.450
## Seattle-Chicago 0.41482 0.1029 0.72672 0.001
## Baltimore-Chicago 0.06538 -0.2465 0.37729 1.000
## Washington-Chicago 0.16691 -0.1450 0.47881 0.842
## Richmond-Chicago 0.33644 0.0245 0.64834 0.022
## Roanoke-Chicago -0.11628 -0.4282 0.19562 0.987
## Philadelphia-Houston -0.09846 -0.4104 0.21344 0.997
## LasVegas-Houston -0.18186 -0.4938 0.13004 0.752
## SanFrancisco-Houston 0.18533 -0.1266 0.49723 0.729
## Detroit-Houston -0.40363 -0.7155 -0.09172 0.001
## Seattle-Houston 0.23381 -0.0781 0.54572 0.369
## Baltimore-Houston -0.11562 -0.4275 0.19628 0.988
## Washington-Houston -0.01409 -0.3260 0.29781 1.000
## Richmond-Houston 0.15544 -0.1565 0.46734 0.897
## Roanoke-Houston -0.29728 -0.6092 0.01462 0.079
## LasVegas-Philadelphia -0.08340 -0.3953 0.22850 0.999
## SanFrancisco-Philadelphia 0.28379 -0.0281 0.59569 0.116
## Detroit-Philadelphia -0.30517 -0.6171 0.00673 0.062
## Seattle-Philadelphia 0.33227 0.0204 0.64418 0.025
## Baltimore-Philadelphia -0.01716 -0.3291 0.29474 1.000
## Washington-Philadelphia 0.08437 -0.2275 0.39627 0.999
## Richmond-Philadelphia 0.25390 -0.0580 0.56580 0.244
## Roanoke-Philadelphia -0.19882 -0.5107 0.11308 0.631
## SanFrancisco-LasVegas 0.36719 0.0553 0.67909 0.007
## Detroit-LasVegas -0.22177 -0.5337 0.09014 0.456
## Seattle-LasVegas 0.41568 0.1038 0.72758 0.001
## Baltimore-LasVegas 0.06624 -0.2457 0.37815 1.000
## Washington-LasVegas 0.16777 -0.1441 0.47967 0.838
## Richmond-LasVegas 0.33730 0.0254 0.64920 0.021
## Roanoke-LasVegas -0.11542 -0.4273 0.19648 0.988
## Detroit-SanFrancisco -0.58895 -0.9009 -0.27705 0.000
## Seattle-SanFrancisco 0.04849 -0.2634 0.36039 1.000
## Baltimore-SanFrancisco -0.30095 -0.6129 0.01096 0.070
## Washington-SanFrancisco -0.19942 -0.5113 0.11248 0.626
## Richmond-SanFrancisco -0.02989 -0.3418 0.28202 1.000
## Roanoke-SanFrancisco -0.48261 -0.7945 -0.17070 0.000
## Seattle-Detroit 0.63744 0.3255 0.94935 0.000
## Baltimore-Detroit 0.28801 -0.0239 0.59991 0.103
## Washington-Detroit 0.38953 0.0776 0.70144 0.003
## Richmond-Detroit 0.55907 0.2472 0.87097 0.000
## Roanoke-Detroit 0.10635 -0.2056 0.41825 0.994
## Baltimore-Seattle -0.34943 -0.6613 -0.03753 0.014
## Washington-Seattle -0.24791 -0.5598 0.06400 0.279
## Richmond-Seattle -0.07838 -0.3903 0.23353 1.000
## Roanoke-Seattle -0.53110 -0.8430 -0.21919 0.000
## Washington-Baltimore 0.10153 -0.2104 0.41343 0.996
## Richmond-Baltimore 0.27106 -0.0408 0.58296 0.162
## Roanoke-Baltimore -0.18166 -0.4936 0.13024 0.754
## Richmond-Washington 0.16953 -0.1424 0.48144 0.828
## Roanoke-Washington -0.28319 -0.5951 0.02872 0.118
## Roanoke-Richmond -0.45272 -0.7646 -0.14082 0.000
ggplot(zillow, aes(x=State)) + geom_bar(colour="blue", fill="purple", alpha=0.6)
num_city = 10
values=head(zillow, num_city, )
values=data.frame(t(as.matrix(values[,7:81])))
colnames(values)=zillow[1:num_city,2]
date = seq(as.Date("2010/11/01"), as.Date("2017/01/31"), "month")
date = as.yearmon(date)
ts=zoo(values,order.by = date)
values=fortify(ts)
values$Index=as.Date(values$Index)
autoplot(ts,facets = NULL)+ geom_point(size=0.5) +
theme_minimal()+
labs(x="Time",y="Price")
price = gather(data = zillow, "Month", "Price", 7:81, factor_key = T)
ggplot(price[price$State == "CA" & !is.na(price$Metro),]) +
geom_boxplot(aes(x = fct_rev(Metro),
y = as.numeric(Price)), fill="#FF9999", color="#56B4E9", outlier.size = 0.5) +
labs(x = "Metro Area", y = "Price", #note these are opposite because I use coord_flip to flip the axes
colour = "#E0E0E0") +
coord_flip()
price = gather(data = zillow, "Month", "Price", 7:81, factor_key = T)
ggplot(price[price$State == "PA" & !is.na(price$Metro),]) +
geom_boxplot(aes(x = fct_rev(Metro),
y = as.numeric(Price)), fill="#9999CC", color="#66CC99", outlier.size = 0.5) +
labs(x = "Metro Area", y = "Price", #note these are opposite because I use coord_flip to flip the axes
colour = "#E0E0E0") +
coord_flip()
# price
ggplot(price[price$County == "Dallas" & !is.na(price$City),],
aes(y = fct_rev(City), x = Price)) +
labs(title = "Zillow | Dallas County Rent Prices",
subtitle = "2010 - 2017",
x = "Price", y = "Dallas County",
colour = "Time") +
geom_point(shape = 20, alpha = 0.4, size = 3, aes(color = Month)) +
scale_color_discrete(l = 45, h = c(30, 330))
ggplot(price[price$County == "Los Angeles" & !is.na(price$City),],
aes(y = fct_rev(City), x = Price)) +
labs(title = "Zillow | Los Angeles County Rent Prices",
subtitle = "2010 - 2017",
x = "Price", y = "LA County",
colour = "Time") +
geom_point(shape = 16, alpha = 0.4, size = 3, aes(color = Month)) +
scale_colour_viridis_d()
# scale_colour_gradientn(colours=rainbow(4))
# Create variable of numeric year
price$Year = as.character(price$Month)
price$Year = as.numeric(substr(price$Month, nchar(as.character(price$Month)) - 3, nchar(as.character(price$Month))))
# Calculate range for each state, by year
states = price[!is.na(price$State),] %>%
group_by(State, Year) %>%
summarise(Mean = round(mean(Price),0),
Min = min(Price),
Max = max(Price))
# Plot change over time, by state.
ggplot(states[!is.na(states$Mean),],
aes(x = fct_rev(State),
y = as.numeric(Mean))) +
labs(title = "Zillow | Mean US Rent Prices",
subtitle = "2010 - 2017",
x = "States", y = "Average Price",
colour = "#E0E0E0") +
geom_point(shape = 20, alpha = 0.8, size = 5, aes(color = Year)) +
scale_color_continuous(aes(guide = ""), low = "black", high = "purple") +
guides(fill = guide_colourbar(barwidth = 0.7, barheight = 15)) +
coord_flip()
library("zoo")
library(ggfortify)
library(reshape2)
num_city = 5
values=head(zillow, num_city, )
values=data.frame(t(as.matrix(values[,7:81])))
colnames(values)=zillow[1:num_city,2]
date = seq(as.Date("2010/11/01"), as.Date("2017/01/31"), "month")
date = as.yearmon(date)
ts=zoo(values,order.by = date)
values=fortify(ts)
values$Index=as.Date(values$Index)
autoplot(ts,facets = NULL)+ geom_hex(size=1.5, alpha=0.7) + scale_fill_viridis_c() +
guides(fill = guide_colourbar(barwidth = 0.7, barheight = 15)) +
theme_minimal()+
labs(x="Time",y="Price")